- Motivation
- Inferenzstatistik
- Item Response Theorie
- Bayes’sche IRT in R
- Showroom: birtms
- Methodik: Bayes’sche IRT-Modellierung
- R-Package-Premiere: birtms
- fachdidaktische Erkenntnisse
- birtms Tutorial
- Item Response Theorie
- Interesse an Statistik
15.9.2021
i1_easyness %>% birtms::plot_ppmc_distribution()
# i1_easyness <- brms::ranef(fit_1d_1pl_spm, summary = FALSE)$item[,"i1",1] head(i1_easyness, n = 24) %>% round(2)
## [1] 0.89 0.54 1.20 0.36 0.57 0.67 0.97 0.55 1.29 1.18 1.71 1.68 ## [13] 2.05 1.36 1.22 1.41 1.46 1.15 -0.26 0.26 0.43 0.10 0.42 0.71
\[P\left(1|\theta_k, \beta_i\right)=\frac{\exp\left(\theta_k + \beta_i\right)}{1+\exp\left(\theta_k + \beta_i\right)}\]
(data_spm_long <- data_spm %>%
birtms::compose_dataset(
response_columns = i1:i12))
## # A tibble: 5,988 x 3 ## person item response ## <chr> <chr> <dbl> ## 1 1 i1 1 ## 2 1 i2 1 ## 3 1 i3 1 ## 4 1 i4 0 ## 5 1 i5 1 ## 6 1 i6 1 ## 7 1 i7 0 ## 8 1 i8 0 ## 9 1 i9 0 ## 10 1 i10 0 ## # ... with 5,978 more rows
(formula1PL <- birtms::build_formula())
## response ~ 1 + (1 | person) + (1 | item)
(formula1PL_fw <- birtms::build_formula(
variable_specifications = list(
person_covariables_main_effect = "fw"
)))## response ~ 1 + (1 | person) + (1 | item) + fw
(formula2PL <- birtms::build_formula(model_specifications = list(item_parameter_number = 2)))
## response ~ skillintercept + exp(logalpha) * theta + beta ## skillintercept ~ 1 ## theta ~ 0 + (1 | person) ## logalpha ~ 1 + (1 | item) ## beta ~ 0 + (1 | item)
prior_1PL <- brms::prior("normal(0, 3)", class = "sd", group = "person") +
brms::prior("normal(0, 3)", class = "sd", group = "item")prior_1PL_gender <- brms::prior("normal(0, 3)", class = "sd", group = "person") +
brms::prior("normal(0, 3)", class = "sd", group = "item") +
brms::prior("constant(0)", class = "b", coef = "genderm") +
brms::prior("normal(1, 0.5)", class = "b", group = "genderw")data_spm_long_gender <- data_spm_long %>% mutate(
gender = ifelse(as.integer(person) %% 2 == 1, "m", "w"),
gender = ifelse(as.integer(person) %% 23 == 1, "d", gender))
brms::get_prior(formula = formula1PL_gender,
data = data_spm_long_gender)
## prior class coef group resp dpar nlpar bound source ## (flat) b default ## (flat) b genderm (vectorized) ## (flat) b genderw (vectorized) ## student_t(3, 0, 2.5) Intercept default ## student_t(3, 0, 2.5) sd default ## student_t(3, 0, 2.5) sd item (vectorized) ## student_t(3, 0, 2.5) sd Intercept item (vectorized) ## student_t(3, 0, 2.5) sd person (vectorized) ## student_t(3, 0, 2.5) sd Intercept person (vectorized)
| Variable | TAM | brms |
|---|---|---|
| Zeit in s | 0.06 | 57 + 81 |
| Iterationen | 36 | 4000 + 4000 + X |
| Speicher in MB | 0.46 | 34.73 |
| Werte je Parameter | 3 | 4000 |
fit_1d_1pl_spm <- birtms::birtm(data = data_spm_long,
formula = formula1PL,
prior = prior_1PL,
file = "models/fit_1d_1pl_spm",
cores = 4
)fit_1d_1pl_spm_gender <- birtms::birtm(data = data_spm_long,
formula = formula1PL_gender,
prior = prior_1PL_gender,
file = "models/fit_1d_1pl_spm_gender",
variable_specifications = list(
person_covariables_main_effect = "gender"
)
)fit_1d_2pl_spm <- birtms::birtm(data = data_spm_long,
formula = formula2PL,
prior = prior_2PL,
file = "models/fit_1d_2pl_spm",
model_specifications = list(
item_parameter_number = 2
)
)fit_1d_2pl_spm <- birtms::birtm_aio(data = data_spm,
response_columns = i1:i12,
prior = prior_2PL,
file = "models/fit_1d_2pl_spm",
model_specifications = list(
item_parameter_number = 2
)
)plot(fit_1d_1pl_spm)
summary(fit_1d_1pl_spm, robust = TRUE)
## Family: bernoulli ## Links: mu = logit ## Formula: response ~ 1 + (1 | person) + (1 | item) ## Data: data (Number of observations: 5988) ## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; ## total post-warmup draws = 4000 ## ## Group-Level Effects: ## ~item (Number of levels: 12) ## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS ## sd(Intercept) 1.49 0.32 1.02 2.46 1.00 1193 2063 ## ## ~person (Number of levels: 499) ## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS ## sd(Intercept) 1.68 0.08 1.54 1.85 1.00 1561 2524 ## ## Population-Level Effects: ## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS ## Intercept 1.05 0.43 0.18 2.02 1.01 682 1213 ## ## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS ## and Tail_ESS are effective sample size measures, and Rhat is the potential ## scale reduction factor on split chains (at convergence, Rhat = 1).
bayesplot::mcmc_trace(
fit_1d_1pl_spm, pars = c("b_Intercept")
)
cowplot::plot_grid(
birtms::plot_itemparameter(
fit_1d_2pl_spm, pars = "slope",
style = "halfeye", items = c(1,4),
alphacut = c(.4, .6, 2),
thresholds = c(0,4)),
birtms::plot_itemparameter(
fit_1d_2pl_spm, pars = "difficulty",
style = "halfeye", items = c(1,4),
thresholds = c(-3,3)),
nrow = 2
)
g_spm_1pl <- birtms::ICC_check(fit_1d_1pl_spm, post_responses = posterior_responses_spm_1pl,
num_groups = 6, item_id = 12)
g_spm_2pl <- birtms::ICC_check(fit_1d_2pl_spm, post_responses = posterior_responses_spm_1pl,
num_groups = 6, item_id = 12)
g_spm_2pl <- birtms::ICC_check(fit_1d_2pl_spm, post_responses = posterior_responses_spm_1pl,
num_groups = 10, item_id = 12)
ppmc_data_spm_1pl_mixed <- fit_1d_1pl_spm %>% birtms::get_ppmcdatasets( ppmcMethod = 'M', post_responses = posterior_responses_spm_1pl) ppmc_data_spm_1pl_outfit_mixed <- fit_1d_1pl_spm %>% birtms::get_ppmccriteria( ppmcdata = ppmc_data_spm_1pl_mixed, ppmcMethod = 'M', criteria = 'outfit') outfit_skim_1pl <- ppmc_data_spm_1pl_outfit_mixed %>% group_by(item) %>% select(item, crit) %>% birtms::custom_skim() %>% rename_all(~str_remove(.x, "numeric.")) %>% mutate_if(is.numeric, ~round(.x, 2))
outfit_skim_1pl %>% select(item, mean, mode, hdi.lower, hdi.upper)
## # A tibble: 12 x 5 ## item mean mode hdi.lower hdi.upper ## <chr> <dbl> <dbl> <dbl> <dbl> ## 1 i1 1.73 1.53 1.27 2.16 ## 2 i10 0.887 0.861 0.780 0.986 ## 3 i11 1.79 1.66 1.22 2.27 ## 4 i12 1.98 1.83 1.37 2.55 ## 5 i2 0.825 0.701 0.487 1.11 ## 6 i3 1.11 1.02 0.753 1.41 ## 7 i4 0.611 0.581 0.495 0.726 ## 8 i5 0.533 0.495 0.415 0.639 ## 9 i6 0.878 0.828 0.650 1.08 ## 10 i7 1.05 0.988 0.864 1.23 ## 11 i8 0.974 0.955 0.835 1.10 ## 12 i9 1.20 1.17 1.00 1.38
birtms::plot_fit_statistic(model = fit_1d_1pl_spm, data = ppmc_data_spm_1pl_outfit_mixed,
units = c(1,12), ppmcMethod = 'M')
ppmc_infit_data <- setNames(data.frame(ppmc_data_spm_1pl_outfit_mixed[,c(1,5)],
ppmc_data_spm_2pl_outfit_mixed[,c(5)]), c('item', '1pl', '2pl'))
ggplot(data = ppmc_infit_data) +
geom_density(aes(`1pl`), colour = '#8b7d6b70', fill = '#8b7d6b70', alpha = 0.3) +
geom_density(aes(`2pl`), colour = '#008b4570', fill = '#008b4570', alpha = 0.3) +
facet_wrap("item", scales = "free") + xlim(c(-2,2))fit_1d_1pl_spm %>% birtms::plot_wrightmap(classic = FALSE)
ordata_1pl_spm <- fit_1d_1pl_spm %>% birtms::get_or(zero_correction = 'Haldane')
ordata_1pl_spm_fullbayes <- fit_1d_1pl_spm %>% birtms::get_or(zero_correction = 'Bayes',
ci_method = 'BayesHDI')
ordata_2pl_spm <- fit_1d_2pl_spm %>% birtms::get_or(zero_correction = 'Haldane')
ordata_2pl_spm_fullbayes <- fit_1d_2pl_spm %>% birtms::get_or(zero_correction = 'Bayes',
ci_method = 'BayesHDI')
ordata_1pl_spm_fullbayes %>% birtms::plot_ppmc_or_heatmap()
ordata_2pl_spm_fullbayes %>% birtms::plot_ppmc_or_heatmap()
ordata_1pl_spm_fullbayes %>% birtms::plot_or_heatmap()
ordata_2pl_spm_fullbayes %>% birtms::plot_or_heatmap(model = fit_1d_2pl_spm)
ordata_1pl_spm_fullbayes %>% birtms::plot_or_heatmap(model = fit_1d_2pl_spm, bayesian = TRUE)
ordata_2pl_spm_fullbayes %>% birtms::plot_or_heatmap(model = fit_1d_2pl_spm)
ppmc_data_spm_1pl <- fit_1d_1pl_spm %>%
birtms::get_ppmcdatasets(
ppmcMethod = 'C', post_responses = posterior_responses_spm_1pl
)
ppmc_data_spm_1pl_person_ll <- fit_1d_1pl_spm %>%
birtms::get_ppmccriteria(
ppmcdata = ppmc_data_spm_1pl, ppmcMethod = 'C', criteria = 'll',
group = .$var_specs$person
)
fit_1d_1pl_spm %>% birtms::plot_fit_statistic(
data = ppmc_data_spm_1pl_person_ll, units = c(1,12), group = .$var_specs$person
)
respfuncdata_1pl <- fit_1d_1pl_spm %>%birtms::calc_personresponsedata( post_responses = posterior_responses_spm_1pl) birtms::plot_personresponsefunction(fit_1d_1pl_spm, respfuncdata_1pl, id = c(2:4))
marg_loo1pl <- birtms::loo_marginal(fit_1d_1pl_spm) marg_loo1pl <- birtms::loo_marginal(fit_1d_2pl_spm)
loo::loo_compare(marg_loo1pl, marg_loo2pl)
## elpd_diff se_diff ## model2 0.0 0.0 ## model1 -25.4 11.1
Münzwurf: 0, 1, 1, 1, 1, 1, 1, 1, 1
\(X \sim binom(n, p)\)
| Aspekt | Klassisch / Frequentistisch | Bayes’sch / Bayesianisch |
|---|---|---|
| Wahrscheinlichkeit | relative Häufigkeit | Grad persönlicher Überzeugung |
| Vorannahme | Münze fair | Münze fair (SD = .10) |
Münzwurf: 0, 1, 1, 1, 1, 1, 1, 1, 1
\(X \sim binom(n, p)\)
| Aspekt | Klassisch / Frequentistisch | Bayes’sch / Bayesianisch |
|---|---|---|
| Wahrscheinlichkeit | relative Häufigkeit | Grad persönlicher Überzeugung |
| Vorannahme | Münze fair | Münze fair (SD = .10) |
| P(X=1) = | 0.89 [0.69, 1] | |
| Hypothesentest | 0.021 | |
| Entscheidung | nicht fair |
Münzwurf: 0, 1, 1, 1, 1, 1, 1, 1, 1
\(X \sim binom(n, p)\)
| Aspekt | Klassisch / Frequentistisch | Bayes’sch / Bayesianisch |
|---|---|---|
| Wahrscheinlichkeit | relative Häufigkeit | Grad persönlicher Überzeugung |
| Vorannahme | Münze fair | Münze fair (SD = .10) |
| P(X=1) = | 0.89 [0.69, 1] | |
| Hypothesentest | 0.021 | |
| Entscheidung | nicht fair |
Münzwurf: 0, 1, 1, 1, 1, 1, 1, 1, 1
\(X \sim binom(n, p)\)
| Aspekt | Klassisch / Frequentistisch | Bayes’sch / Bayesianisch |
|---|---|---|
| Wahrscheinlichkeit | relative Häufigkeit | Grad persönlicher Überzeugung |
| Vorannahme | Münze fair | Münze fair (SD = .29) |
| P(X=1) = | 0.89 [0.69, 1] | |
| Hypothesentest | 0.021 | |
| Entscheidung | nicht fair |
Modell: \(X \sim binom(n, p)\)
Beendigung der Datenerhebung nach:
p-value: Die Wahrscheinlichkeit in einer Stichprobe der hypothetischen Population ein Ergebnis zu erhalten, dass mindestens so extrem wie das vorliegende Ergebnis ist, wobei die beabsichtigten Erhebungs- und Auswertungsmethoden berücksichtigt werden müssen.
zweite Münze 24 mal werfen: \(p\left(\frac{z_1}{N_1}\right) = 0.063\)
zweite Münze 12 mal werfen: \(p\left(\frac{z_1}{N_1}\right) = 0.103\)
Bürkner, P.-C. (2019). Bayesian Item Response Modeling in R with brms and Stan. Verfügbar unter: https://arxiv.org/pdf/1905.09501
Bürkner, P.-C. (2020). Analysing Standard Progressive Matrices (SPM-LS) with Bayesian Item Response Models. Journal of Intelligence, 8(1). https://doi.org/10.3390/jintelligence8010005
Clark, M. (2018). Bayesian Basics. Verfügbar unter: https://m-clark.github.io/bayesian-basics/diagnostics.html
Fujimoto, K. A. & Neugebauer, S. R. (2020). A General Bayesian Multidimensional Item Response Theory Model for Small and Large Samples. Educational and psychological measurement, 80(4), 665–694. https://doi.org/10.1177/0013164419891205
Grottke, T., Möhrke, P. & Marvin, R. (2021). Statistische Analysen mit R in den MINT-Didaktiken Eine Tutorial-Sammlung: 4 Praxisorientierte Einf ührung in die Item-Response-Theorie mit dem Fokus auf das Rasch-Modell. Verfügbar unter: https://dbuschhue.github.io/P4-Worflow/praxisorientierte-einf
Haberman, S. J., Holland, P. W. & Sinharay, S. (2007). Limits on Log Odds Ratios for Unidimensional Item Response Theory Models. Psychometrika, 72(4), 551–561. https://doi.org/10.1007/S11336-007-9009-0
Hyvönen, V. H. & Tolonen, T. (2019). Chapter 3 Summarizing the posterior distribution | Bayesian Inference 2019. Verfügbar unter: https://vioshyvo.github.io/Bayesian_inference/summarizing-the-posterior-distribution.html
JASP Team. (2020). JASP (Version 0.14.1)[Computer software]. Verfügbar unter: https://jasp-stats.org/
Jeon, M. & Rijmen, F. (2016). A modular approach for item response theory modeling with the R package flirt. Behavior research methods, 48(2), 742–755. https://doi.org/10.3758/s13428-015-0606-z
Kruschke, J. K. (2015). Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan (2. ed.). Amsterdam: AP Academic Press/Elsevier. Verfügbar unter: http://www.contentreserve.com/TitleInfo.asp?ID=38F45CF6-6B5C-433C-85F8-A3568420927D&Format=50
Kruschke, J. K. & Liddell, T. M. (2018). The Bayesian New Statistics: Hypothesis testing, estimation, meta-analysis, and power analysis from a Bayesian perspective. Psychonomic Bulletin & Review, 25(1), 178–206. https://doi.org/10.3758/s13423-016-1221-4
Kruschke, J. K. & Liddell, T. M. (2018). The Bayesian New Statistics: Hypothesis testing, estimation, meta-analysis, and power analysis from a Bayesian perspective. Psychonomic Bulletin & Review, 25(1), 178–206. https://doi.org/10.3758/s13423-016-1221-4
Meehl, P. E. (1997). The Problem Is Epistemology, Not Statistics: Replace Significance Tests by Confidence Intervals and Quantify Accuracy of Risky Numerical Predictions (Multivariate Applications Series). In L.L. Harlow, S.A. Mulaik & J.H. Steiger (Hrsg.), What If There Were No Significance Tests? (S. 393–425). Hoboken: Taylor and Francis. Verfügbar unter: https://meehl.umn.edu/sites/meehl.umn.edu/files/files/169problemisepistemology.pdf
Merkle, E. C., Furr, D. & Rabe-Hesketh, S. (2019). Bayesian Comparison of Latent Variable Models: Conditional Versus Marginal Likelihoods. Psychometrika, 84(3), 802–829. https://doi.org/10.1007/s11336-019-09679-0
Scharl, A. & Gnambs, T. (2019). Longitudinal item response modeling and posterior predictive checking in R and Stan. The Quantitative Methods for Psychology, 15(2), 75–95. https://doi.org/10.20982/tqmp.15.2.p075
Waning, B., Montagne, M., McCloskey, W. W. & Maki, R. A. (2001). Pharmacoepidemiology: Principles and practice. New York: McGraw-Hill. Verfügbar unter: http://www.loc.gov/catdir/bios/mh041/00045207.html
Zhang, O., Miller, D. & Cannady, M. (2011). A Model Evaluation When Associations Exists Across Testlets under Small Testlet Size Situations. Verfügbar unter: https://ouzhang.me/pdf/2011NCME2_slide.pdf